Ken Furudate

In [ ]:
import scanpy as sc
import anndata as ad
import squidpy as sq

import numpy as np
import pandas as pd

import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.font_manager
plt.rcParams['font.sans-serif'] = ['Arial'] + plt.rcParams['font.sans-serif']
plt.rcParams["font.size"] = 20

import os
import seaborn as sns
from pathlib import Path

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import warnings
warnings.filterwarnings('ignore')

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

from anndata import AnnData

import skimage
import tangram as tg
/usr/local/lib/python3.7/dist-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  defaults = yaml.load(f)
scanpy==1.9.1 anndata==0.8.0 umap==0.5.3 numpy==1.21.6 scipy==1.4.1 pandas==1.3.5 scikit-learn==1.0.2 statsmodels==0.13.2 python-igraph==0.9.10 pynndescent==0.5.7
squidpy==1.1.2
In [ ]:
sc.set_figure_params(facecolor="white", figsize=(10, 10), dpi_save=300)
sc.settings.verbosity = 3
In [ ]:
datadir="/data/spatial/"
In [ ]:
in_f = "integrated_data.h5ad"

data = sc.read_h5ad(datadir + in_f)
data
Out[ ]:
AnnData object with n_obs × n_vars = 3637 × 4000
    obs: 'in_tissue', 'array_row', 'array_col', 'imagecol', 'imagerow', 'pathology', 'category', 'cluster', 'sample', 'n_counts', 'batch', '_scvi_batch', '_scvi_labels', 'leiden', 'sample_density', '_scvi_raw_norm_scaling'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells-0-0', 'n_cells-1-0', 'n_cells-1', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: '_scvi', 'dendrogram_leiden', 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pathology_colors', 'sample_colors', 'sample_density_params', 'umap', 'wilcoxon'
    obsm: 'X_pca', 'X_scvi', 'X_umap', 'spatial'
    layers: 'counts', 'scvi_expr'
    obsp: 'connectivities', 'distances'
In [ ]:
sample_name1="A"
sample_name2="B"
sample_name3="C"
sample_name4="D"
In [ ]:
adata = data[data.obs['sample'] == sample_name1]
bdata = data[data.obs['sample'] == sample_name2]
cdata = data[data.obs['sample'] == sample_name3]
In [ ]:
ddata = sc.read_h5ad(datadir + f"{sample_name4/)")

ddata.var_names_make_unique()
ddata.var_names
ddata
Out[ ]:
AnnData object with n_obs × n_vars = 970 × 17781
    obs: 'in_tissue', 'array_row', 'array_col', 'imagecol', 'imagerow', 'pathology', 'category', 'cluster', 'sample', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_hb', 'pct_counts_hb', 'n_counts', 'tile_path'
    var: 'gene_ids', 'feature_types', 'genome', 'mt', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells'
    uns: 'category_colors', 'cluster_colors', 'gene_expression_correlation', 'morphological_distance', 'pathology_colors', 'pca', 'physical_distance', 'spatial', 'weights_matrix_all', 'weights_matrix_gd_md', 'weights_matrix_pd_gd', 'weights_matrix_pd_md'
    obsm: 'X_morphology', 'X_pca', 'X_tile_feature', 'imputed_data', 'raw_SME_normalized', 'spatial', 'top_weights'
    varm: 'PCs'
In [ ]:
in_f1 = "SampleA_cluster_patho.annot_region.txt"
in_f2 = "SampleB_cluster_patho.annot_region.txt"
in_f3 = "SampleC_cluster_patho.annot_region.txt"
in_f4 = "SampleD_cluster_patho.annot_region.txt"
In [ ]:
a_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f1), index_col="Barcode")
b_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f2), index_col="Barcode")
c_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f3), index_col="Barcode")
d_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f4), index_col="Barcode")
In [ ]:
adata.obs['pathology'] = a_annot["patho_diag"]
adata.obs['category'] = a_annot["category"]
adata.obs['cluster'] = a_annot["graph-based"]

bdata.obs['pathology'] = b_annot["patho_diag"]
bdata.obs['category'] = b_annot["category"]
bdata.obs['cluster'] = b_annot["graph-based"]

cdata.obs['pathology'] = c_annot["patho_diag"]
cdata.obs['category'] = c_annot["category"]
cdata.obs['cluster'] = c_annot["graph-based"]

ddata.obs['pathology'] = d_annot["patho_diag"]
ddata.obs['category'] = d_annot["category"]
ddata.obs['cluster'] = d_annot["graph-based"]
In [ ]:
adata.obs['sample'] = sample_name1
bdata.obs['sample'] = sample_name2
cdata.obs['sample'] = sample_name3
ddata.obs['sample'] = sample_name4
In [ ]:
adata_tumor = adata[adata.obs['category']=="tumor"]
bdata_tumor = bdata[bdata.obs['category']=="tumor"]
cdata_tumor = cdata[cdata.obs['category']=="tumor"]
ddata_tumor = ddata[ddata.obs['category']=="tumor"]
In [ ]:
sc.pp.filter_cells(adata_tumor, min_counts=500)
sc.pp.filter_cells(bdata_tumor, min_counts=500)
sc.pp.filter_cells(cdata_tumor, min_counts=500)
sc.pp.filter_cells(ddata_tumor, min_counts=500)

sc.pp.filter_genes(adata_tumor, min_cells=2)
sc.pp.filter_genes(bdata_tumor, min_cells=2)
sc.pp.filter_genes(cdata_tumor, min_cells=2)
sc.pp.filter_genes(ddata_tumor, min_cells=2)
filtered out 4 cells that have less than 500 counts
In [ ]:
def set_param(input_data, sample):
  scale = input_data.uns['spatial'][f"{sample}"]['scalefactors']['tissue_hires_scalef']
  img = sq.im.ImageContainer(input_data.uns['spatial'][f"{sample}"]['images']['hires'],
                           scale=scale, 
                           library_id=f"{sample}")
  img.show()
  return scale, img
In [ ]:
scale_a, img_a = set_param(adata_tumor, sample_a)
scale_b, img_b = set_param(bdata_tumor, sample_b)
scale_c, img_c = set_param(cdata_tumor, sample_c)
scale_d, img_d = set_param(ddata_tumor, sample_d)
Adding image layer `image`
Adding image layer `image`
Adding image layer `image`
Adding image layer `image`
In [ ]:
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_a.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_a["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_b["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_a["image"][:, :, 0, 0]<0.6)
plt.tight_layout()
In [ ]:
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_b.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_b["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_b["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_b["image"][:, :, 0, 0]<0.58)
plt.tight_layout()
In [ ]:
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_c.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_c["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_c["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_c["image"][:, :, 0, 0]<0.78)
plt.tight_layout()
In [ ]:
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_d.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_d["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_d["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_d["image"][:, :, 0, 0]<0.65)
plt.tight_layout()
In [ ]:
def segmentation_features(img, input_data, thresh):
  sq.im.process(img=img, layer="image", method="smooth")
  sq.im.segment(
      img=img,
      layer="image",
      method="watershed",
      channel=0,
      thresh=thresh, 
      geq=False
      )

  # define image layer to use for segmentation
  features_kwargs = {
      "segmentation": {
        "label_layer": "segmented_watershed",
        "props": ["label", "centroid"],
        "channels": [1, 2],
        }
      }

# calculate segmentation features
  sq.im.calculate_image_features(
      adata=input_data,
      img=img,
      layer="image",
      key_added="image_features",
      features_kwargs=features_kwargs,
      features="segmentation",
      mask_circle=True,
      n_jobs=2
      )
In [ ]:
segmentation_features(img_a, adata_tumor, 0.6)
segmentation_features(img_b, bdata_tumor, 0.58)
segmentation_features(img_c, cdata_tumor, 0.78)
segmentation_features(img_d, ddata_tumor, 0.65)
Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1907, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:01:19)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)
Adding `adata.obsm['image_features']`
Finish (0:00:11)
Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1789, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:00:53)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)
Adding `adata.obsm['image_features']`
Finish (0:00:03)
Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1788, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:00:57)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)
Adding `adata.obsm['image_features']`
Finish (0:00:12)
Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1978, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:01:20)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)
Adding `adata.obsm['image_features']`
Finish (0:00:06)
In [ ]:
adata_tumor.obs["cell_count"] = adata_tumor.obsm["image_features"]["segmentation_label"]
bdata_tumor.obs["cell_count"] = bdata_tumor.obsm["image_features"]["segmentation_label"]
cdata_tumor.obs["cell_count"] = cdata_tumor.obsm["image_features"]["segmentation_label"]
ddata_tumor.obs["cell_count"] = ddata_tumor.obsm["image_features"]["segmentation_label"]
In [ ]:
in_f = "oscc_sc_data.h5ad"

adata_sc = sc.read_h5ad(datadir + in_f)
adata_sc
Out[ ]:
AnnData object with n_obs × n_vars = 5467 × 21859
    obs: 'label', 'sample_id', 'metastasis', 'site', 'condition', 'n_genes', 'percent_mito', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_hb', 'pct_counts_hb', 'leiden'
    var: 'n_cells', 'mt', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'condition_colors', 'hvg', 'label_colors', 'leiden', 'neighbors', 'pca', 'rank_genes_groups', 'sample_id_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'
In [ ]:
sc.tl.rank_genes_groups(adata_sc, groupby="label", method="wilcoxon", use_raw=False)
markers_df = pd.DataFrame(adata_sc.uns["rank_genes_groups"]["names"]).iloc[0:100, :]
markers_df
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:09)
Out[ ]:
B cell CAF CRC DC EC MAF MSC Mac Mst Myo T cell
0 SSR4 SERPING1 KRT5 LAMP3 PECAM1 TAGLN NNMT HLA-DRA TPSB2 NEB PTPRC
1 MZB1 DCN KRT17 CCR7 S1PR1 TNS1 CHRNA1 HLA-DRB1 TPSAB1 ACTC1 TMSB4X
2 DERL3 C1R FXYD3 BIRC3 HYAL2 MYL9 RASD1 HLA-DQB1 CPA3 DES ARHGDIB
3 FKBP11 C1S PERP LY75 CDH5 ADAMTS1 CD63 CD74 TPSD1 FLNC STK17B
4 HERPUD1 COL1A2 DSP CD74 ENG CALD1 MEG3 HLA-DPA1 KIT H19 B2M
... ... ... ... ... ... ... ... ... ... ... ...
95 MALAT1 PRKCDBP TKT PAN3 COL4A1 MAP1B CLU CECR1 PAG1 APOBEC2 DAND5
96 ICAM2 TIMP3 ANXA8L1 TMSB4X PRSS23 EDNRA IP6K3 TLR2 CDK15 PADI2 L1TD1
97 RRBP1 DIO2 TNS4 HCK SPRY1 GSN MT1M SRGN MLPH MLIP CYTIP
98 RNF122 ARID5B SNRPE GLS DARC HSPB8 FOXO3 MSR1 LAX1 PGAM2 AKAP5
99 KLHL6 CD248 KRT6B STARD7 SCARF1 MOCS1 IFITM2 PPT1 RIN3 TBX15 AGMAT

100 rows × 11 columns

In [ ]:
markers_df.to_csv(str(OUT_PATH) + OUTPUT1, sep='\t', index=True)
In [ ]:
genes_sc = np.unique(markers_df.melt().value.values)
genes_st = adata_tumor.var_names.values
markers = list(set(genes_sc).intersection(set(genes_st)))
len(markers)
Out[ ]:
338
In [ ]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device
Out[ ]:
'cuda'
In [ ]:
data_ = None
data_ = ddata_tumor.copy()

ad_map = None
annotation_list = list()
df_all_genes = pd.DataFrame()
  
tg.pp_adatas(adata_sc, data_, genes=markers)

ad_map = tg.map_cells_to_space(adata_sc=adata_sc,     # scRNA-seq
                               adata_sp=data_,          # spatial data 
                               mode="constrained",
                               cluster_label='label',    # .obs field w cell types
                               target_count=data_.obs.cell_count.sum(),
                               density_prior=np.array(data_.obs.cell_count) / data_.obs.cell_count.sum(),
                               #target_count=data_.obs["rna_count_based_density"].sum(),
                               #density_prior='rna_count_based', # defalt
                               scale=True,
                               num_epochs=10000,
                               device=device,
                               learning_rate=0.1,
                               random_state=42
                               )

# Cell type maps
tg.project_cell_annotations(adata_map=ad_map, adata_sp=data_, annotation="label", threshold=0.5)
annotation_list = list(pd.unique(adata_sc.obs['label']))
data_.obs = pd.concat([data_.obs, data_.obsm["tangram_ct_pred"]], axis=1)

sc.pl.spatial(data_,
                color=annotation_list,
                vmax="p85",
                color_map="viridis",
                legend_fontsize=20,
                )

tg.plot_training_scores(ad_map, bins=20, alpha=.5)

ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc)
df_all_genes = tg.compare_spatial_geneexp(ad_ge, data_, adata_sc)
tg.plot_auc(df_all_genes)

tg.create_segment_cell_df(data_)
tg.count_cell_annotations(
    ad_map,
    adata_sc,
    data_,
    annotation="label",
    )

data_segment = tg.deconvolve_cell_annotations(data_)

sc.pl.spatial(
    data_segment[(data_segment.obs["cluster"] == "CRC" ) | (data_segment.obs["cluster"] == "CAF") | (data_segment.obs["cluster"] == "T cell")],
    color="cluster",
    size=0.05,
    show=True,
    frameon=True,
    alpha_img=0.05,
    legend_fontsize=20,
    )

plt.show()